Packages
library(knitr) # html tables
library(gtsummary)
library(dplyr)
FALSE
FALSE Attaching package: 'dplyr'
FALSE The following objects are masked from 'package:stats':
FALSE
FALSE filter, lag
FALSE The following objects are masked from 'package:base':
FALSE
FALSE intersect, setdiff, setequal, union
source(file="RFn_Plot-lmSim.R")
library(leaps)
Data Loading
teaching <- readxl::read_excel('Bteaching.xls')
teaching <- data.frame(teaching)
teaching$GROUP <- factor(teaching$GROUP, level = c('C', 'E'), labels = c('Control', 'Experiment'))
teaching$GENDER <- factor(teaching$GENDER, level = c('M', 'F'), labels = c('Male', 'Female'))
# Don't need this teaching$TAUGHT <- factor(ifelse(teaching$PREF == 'N' | teaching$GROUP == 'Control', 'TR', teaching$PREF), level = c('TR', 'A', 'T', 'K', 'V'), labels = c('Traditional', 'Auditory', 'Tactile', 'Kinesthetic', 'Visual'))
teaching$PREF <- factor(teaching$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('Auditory', 'No Preference', 'Tactile', 'Kinesthetic', 'Visual')) # get understandable lables
teaching$SLEVEL <- factor(teaching$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
Data exploratory
kable(summary(teaching))
| GROUP | SESSION | PREF | GENDER | SLEVEL | PRE | POST | ATT | |
|---|---|---|---|---|---|---|---|---|
| Control :43 | Length:87 | Auditory :21 | Male :17 | Elementary:59 | Min. : 5.00 | Min. : 36.00 | Min. :21.00 | |
| Experiment:44 | Class :character | No Preference:32 | Female:70 | Highschool:28 | 1st Qu.: 40.00 | 1st Qu.: 76.00 | 1st Qu.:42.50 | |
| NA | Mode :character | Tactile :23 | NA | NA | Median : 55.00 | Median : 80.00 | Median :50.00 | |
| NA | NA | Kinesthetic : 4 | NA | NA | Mean : 55.63 | Mean : 80.87 | Mean :48.05 | |
| NA | NA | Visual : 7 | NA | NA | 3rd Qu.: 75.00 | 3rd Qu.: 90.00 | 3rd Qu.:55.00 | |
| NA | NA | NA | NA | NA | Max. :100.00 | Max. :100.00 | Max. :60.00 |
teaching$index = 1:nrow(teaching)
tsibble::tsibble(teaching, index = index)
FALSE # A tsibble: 87 x 9 [1]
FALSE GROUP SESSION PREF GENDER SLEVEL PRE POST ATT index
FALSE <fct> <chr> <fct> <fct> <fct> <dbl> <dbl> <dbl> <int>
FALSE 1 Control S1 Auditory Female Elementary 5 60 41 1
FALSE 2 Control S2 Auditory Female Elementary 20 80 56 2
FALSE 3 Control S4 Auditory Female Elementary 95 80 56 3
FALSE 4 Control S1 No Preference Male Elementary 5 36 36 4
FALSE 5 Control S3 No Preference Female Elementary 90 88 40 5
FALSE 6 Control S1 Tactile Female Highschool 85 84 50 6
FALSE 7 Control S2 Tactile Female Elementary 45 80 58 7
FALSE 8 Control S2 Tactile Female Highschool 65 80 56 8
FALSE 9 Control S2 No Preference Female Elementary 55 76 54 9
FALSE 10 Control S3 Tactile Female Elementary 85 84 59 10
FALSE # ℹ 77 more rows
table1 <- tbl_summary(
teaching,
by = GROUP, # split table by group
missing = "no" # don't list missing data separately
) |>
add_n() |> # add column with total number of non-missing observations
add_p() |> # test for a difference between groups
modify_header(label = "**Variable**") |> # update the column header
bold_labels()
FALSE Warning for variable 'PRE':
FALSE simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
FALSE Warning for variable 'POST':
FALSE simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
FALSE Warning for variable 'ATT':
FALSE simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
table1
| Variable | N | Control, N = 431 | Experiment, N = 441 | p-value2 |
|---|---|---|---|---|
| SESSION | 87 | >0.9 | ||
| S1 | 11 (26%) | 11 (25%) | ||
| S2 | 11 (26%) | 12 (27%) | ||
| S3 | 11 (26%) | 11 (25%) | ||
| S4 | 10 (23%) | 10 (23%) | ||
| PREF | 87 | 0.4 | ||
| Auditory | 12 (28%) | 9 (20%) | ||
| No Preference | 17 (40%) | 15 (34%) | ||
| Tactile | 11 (26%) | 12 (27%) | ||
| Kinesthetic | 2 (4.7%) | 2 (4.5%) | ||
| Visual | 1 (2.3%) | 6 (14%) | ||
| GENDER | 87 | 0.4 | ||
| Male | 10 (23%) | 7 (16%) | ||
| Female | 33 (77%) | 37 (84%) | ||
| SLEVEL | 87 | 0.4 | ||
| Elementary | 31 (72%) | 28 (64%) | ||
| Highschool | 12 (28%) | 16 (36%) | ||
| PRE | 87 | 60 (45, 78) | 50 (35, 75) | 0.4 |
| POST | 87 | 80 (72, 84) | 88 (80, 96) | <0.001 |
| ATT | 87 | 49 (42, 54) | 52 (45, 55) | 0.2 |
| index | 87 | 22 (12, 32) | 66 (55, 76) | <0.001 |
| 1 n (%); Median (IQR) | ||||
| 2 Pearson's Chi-squared test; Fisher's exact test; Wilcoxon rank sum test; Wilcoxon rank sum exact test | ||||
tbl_strata <-
teaching |>
tbl_strata(
strata = GROUP,
.tbl_fun =
~ .x |>
tbl_summary(by = SESSION, missing = "no") |>
add_n() |> # add column with total number of non-missing observations
add_p() |> # test for a difference between groups
modify_header(label = "**Variable**") |> # update the column header
bold_labels() ,
.header = "**{strata}**, N = {n}"
)
tbl_strata
| Variable | Control, N = 43 | Experiment, N = 44 | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| N | S1, N = 111 | S2, N = 111 | S3, N = 111 | S4, N = 101 | p-value2 | N | S1, N = 111 | S2, N = 121 | S3, N = 111 | S4, N = 101 | p-value2 | |
| PREF | 43 | 0.10 | 44 | 0.9 | ||||||||
| Auditory | 3 (27%) | 6 (55%) | 0 (0%) | 3 (30%) | 2 (18%) | 2 (17%) | 3 (27%) | 2 (20%) | ||||
| No Preference | 3 (27%) | 2 (18%) | 8 (73%) | 4 (40%) | 4 (36%) | 6 (50%) | 3 (27%) | 2 (20%) | ||||
| Tactile | 3 (27%) | 3 (27%) | 3 (27%) | 2 (20%) | 3 (27%) | 2 (17%) | 4 (36%) | 3 (30%) | ||||
| Kinesthetic | 1 (9.1%) | 0 (0%) | 0 (0%) | 1 (10%) | 0 (0%) | 0 (0%) | 1 (9.1%) | 1 (10%) | ||||
| Visual | 1 (9.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 2 (18%) | 2 (17%) | 0 (0%) | 2 (20%) | ||||
| GENDER | 43 | 0.078 | 44 | >0.9 | ||||||||
| Male | 5 (45%) | 3 (27%) | 0 (0%) | 2 (20%) | 2 (18%) | 2 (17%) | 1 (9.1%) | 2 (20%) | ||||
| Female | 6 (55%) | 8 (73%) | 11 (100%) | 8 (80%) | 9 (82%) | 10 (83%) | 10 (91%) | 8 (80%) | ||||
| SLEVEL | 43 | 0.046 | 44 | 0.4 | ||||||||
| Elementary | 8 (73%) | 8 (73%) | 5 (45%) | 10 (100%) | 8 (73%) | 6 (50%) | 6 (55%) | 8 (80%) | ||||
| Highschool | 3 (27%) | 3 (27%) | 6 (55%) | 0 (0%) | 3 (27%) | 6 (50%) | 5 (45%) | 2 (20%) | ||||
| PRE | 43 | 65 (38, 75) | 50 (20, 60) | 70 (60, 80) | 55 (46, 71) | 0.3 | 44 | 40 (20, 72) | 50 (32, 52) | 70 (42, 78) | 62 (45, 74) | 0.2 |
| POST | 43 | 72 (64, 76) | 80 (76, 80) | 80 (78, 84) | 80 (77, 87) | 0.093 | 44 | 92 (80, 96) | 88 (80, 92) | 92 (82, 96) | 80 (80, 90) | 0.4 |
| ATT | 43 | 42 (38, 51) | 51 (44, 55) | 49 (42, 54) | 47 (43, 54) | 0.6 | 44 | 46 (38, 52) | 48 (43, 52) | 51 (46, 56) | 53 (51, 56) | 0.2 |
| index | 43 | 22 (9, 30) | 23 (8, 28) | 17 (12, 34) | 24 (16, 34) | 0.8 | 44 | 61 (56, 72) | 60 (52, 74) | 62 (54, 67) | 79 (73, 83) | 0.020 |
| 1 n (%); Median (IQR) | ||||||||||||
| 2 Fisher's exact test; Kruskal-Wallis rank sum test | ||||||||||||
Dummy encoding
library(fastDummies)
teaching <- dummy_columns(teaching, select_columns = c('GROUP', 'SESSION', 'PREF', 'GENDER', 'SLEVEL'), remove_selected_columns = TRUE, remove_first_dummy = TRUE)
DID (Liam, Michael & Tania)
teaching$ID <- 1:nrow(teaching)
teaching_DID <- teaching %>% slice(rep(1:n(), each = 2))
teaching_DID$POST_ind <- rep(c(0,1),nrow(teaching))
teaching_DID$OUTCOME <- ifelse(teaching_DID$POST_ind == 1, teaching_DID$POST, teaching_DID$PRE)
teaching_DID <- subset(teaching_DID, select = -c(POST, PRE))
DID without covariates
DID <- lm(OUTCOME ~ POST_ind + GROUP_Experiment + GROUP_Experiment*POST_ind, teaching_DID)
summary(DID)
FALSE
FALSE Call:
FALSE lm(formula = OUTCOME ~ POST_ind + GROUP_Experiment + GROUP_Experiment *
FALSE POST_ind, data = teaching_DID)
FALSE
FALSE Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -52.674 -12.000 2.364 10.364 46.364
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 57.674 2.990 19.291 < 0.0000000000000002 ***
FALSE POST_ind 18.326 4.228 4.334 0.000025 ***
FALSE GROUP_Experiment -4.038 4.204 -0.961 0.3381
FALSE POST_ind:GROUP_Experiment 13.674 5.945 2.300 0.0227 *
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE Residual standard error: 19.6 on 170 degrees of freedom
FALSE Multiple R-squared: 0.3153, Adjusted R-squared: 0.3032
FALSE F-statistic: 26.1 on 3 and 170 DF, p-value: 0.00000000000006177
plot(DID)
plot.lmSim(DID)
There is a treatment effect but no difference in the beginning between the groups.
DID with covariates
mod <- lm(OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment * POST_ind, teaching_DID)
summary(mod)
FALSE
FALSE Call:
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 +
FALSE SESSION_S3 + SESSION_S4 + `PREF_No Preference` + PREF_Tactile +
FALSE PREF_Kinesthetic + PREF_Visual + GENDER_Female + SLEVEL_Highschool +
FALSE ID + POST_ind + GROUP_Experiment * POST_ind, data = teaching_DID)
FALSE
FALSE Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -45.236 -12.170 1.709 11.486 39.727
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 24.33401 10.09113 2.411 0.01703 *
FALSE ATT 0.58944 0.18162 3.245 0.00143 **
FALSE GROUP_Experiment -5.77073 6.81210 -0.847 0.39820
FALSE SESSION_S2 -2.06097 4.11088 -0.501 0.61682
FALSE SESSION_S3 6.53638 4.44417 1.471 0.14333
FALSE SESSION_S4 1.79008 4.58592 0.390 0.69681
FALSE `PREF_No Preference` 4.50878 3.76880 1.196 0.23334
FALSE PREF_Tactile 5.55874 4.04905 1.373 0.17173
FALSE PREF_Kinesthetic 4.52404 7.40799 0.611 0.54227
FALSE PREF_Visual -7.16461 6.13331 -1.168 0.24450
FALSE GENDER_Female 0.82544 3.74080 0.221 0.82564
FALSE SLEVEL_Highschool -2.97465 3.25297 -0.914 0.36187
FALSE ID 0.04342 0.12739 0.341 0.73364
FALSE POST_ind 18.32558 4.00856 4.572 0.00000967 ***
FALSE GROUP_Experiment:POST_ind 13.67442 5.63666 2.426 0.01639 *
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE Residual standard error: 18.59 on 159 degrees of freedom
FALSE Multiple R-squared: 0.4244, Adjusted R-squared: 0.3737
FALSE F-statistic: 8.373 on 14 and 159 DF, p-value: 0.000000000000251
plot(mod)
plot.lmSim(mod)
Model seems fine. We check the gam for linearity (of ATT as it is the only numerical value)
# Check gam
library(gam)
FALSE Loading required package: splines
FALSE Loading required package: foreach
FALSE Loaded gam 1.22-2
mod.gam <- gam(OUTCOME ~ lo(ATT) + GROUP_Experiment + SESSION_S2 + SESSION_S3 + SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment * POST_ind, teaching_DID, family = gaussian(link = 'identity'))
plot(mod.gam, se = T)
Gam is fine. Linearity is ok.
Selection of variables
library(regclass)
FALSE Loading required package: bestglm
FALSE Loading required package: VGAM
FALSE Loading required package: stats4
FALSE
FALSE Attaching package: 'VGAM'
FALSE The following object is masked from 'package:gam':
FALSE
FALSE s
FALSE Loading required package: rpart
FALSE Loading required package: randomForest
FALSE randomForest 4.7-1.1
FALSE Type rfNews() to see new features/changes/bug fixes.
FALSE
FALSE Attaching package: 'randomForest'
FALSE The following object is masked from 'package:dplyr':
FALSE
FALSE combine
FALSE Important regclass change from 1.3:
FALSE All functions that had a . in the name now have an _
FALSE all.correlations -> all_correlations, cor.demo -> cor_demo, etc.
sort(VIF(mod), decreasing = TRUE)
FALSE GROUP_Experiment ID GROUP_Experiment:POST_ind
FALSE 5.842230 5.154437 3.023256
FALSE POST_ind SESSION_S3 SESSION_S4
FALSE 2.023256 1.879374 1.875224
FALSE `PREF_No Preference` SESSION_S2 PREF_Tactile
FALSE 1.663469 1.655288 1.605863
FALSE PREF_Visual PREF_Kinesthetic ATT
FALSE 1.401759 1.212368 1.202996
FALSE SLEVEL_Highschool GENDER_Female
FALSE 1.163226 1.108078
Shows which variables have higher importance but we don’t use this in the end
forward
mod_min <- lm(OUTCOME ~ 1, teaching_DID)
s <- step(mod_min, direction='forward', scope=formula(lm(OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment * POST_ind, teaching_DID)))
FALSE Start: AIC=1099.43
FALSE OUTCOME ~ 1
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE + POST_ind 1 27715.0 67712 1041.7
FALSE + ATT 1 5476.8 89950 1091.1
FALSE + SESSION_S3 1 2753.7 92673 1096.3
FALSE + PREF_Visual 1 1600.5 93826 1098.5
FALSE <none> 95427 1099.4
FALSE + SESSION_S2 1 1029.3 94398 1099.5
FALSE + PREF_Tactile 1 940.2 94487 1099.7
FALSE + `PREF_No Preference` 1 397.5 95029 1100.7
FALSE + GENDER_Female 1 341.1 95086 1100.8
FALSE + GROUP_Experiment 1 340.8 95086 1100.8
FALSE + SESSION_S4 1 280.1 95147 1100.9
FALSE + SLEVEL_Highschool 1 258.9 95168 1101.0
FALSE + ID 1 81.0 95346 1101.3
FALSE + PREF_Kinesthetic 1 75.3 95352 1101.3
FALSE
FALSE Step: AIC=1041.73
FALSE OUTCOME ~ POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE + ATT 1 5476.8 62235 1029.0
FALSE + SESSION_S3 1 2753.7 64958 1036.5
FALSE + PREF_Visual 1 1600.5 66111 1039.6
FALSE + SESSION_S2 1 1029.3 66683 1041.1
FALSE + PREF_Tactile 1 940.2 66772 1041.3
FALSE <none> 67712 1041.7
FALSE + `PREF_No Preference` 1 397.5 67314 1042.7
FALSE + GENDER_Female 1 341.1 67371 1042.8
FALSE + GROUP_Experiment 1 340.8 67371 1042.8
FALSE + SESSION_S4 1 280.1 67432 1043.0
FALSE + SLEVEL_Highschool 1 258.9 67453 1043.1
FALSE + ID 1 81.0 67631 1043.5
FALSE + PREF_Kinesthetic 1 75.3 67637 1043.5
FALSE
FALSE Step: AIC=1029.05
FALSE OUTCOME ~ POST_ind + ATT
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE + SESSION_S3 1 2056.06 60179 1025.2
FALSE + PREF_Visual 1 1620.50 60615 1026.5
FALSE + SESSION_S2 1 1277.32 60958 1027.5
FALSE <none> 62235 1029.0
FALSE + PREF_Tactile 1 564.32 61671 1029.5
FALSE + `PREF_No Preference` 1 498.48 61737 1029.7
FALSE + SLEVEL_Highschool 1 205.11 62030 1030.5
FALSE + GENDER_Female 1 175.90 62059 1030.6
FALSE + PREF_Kinesthetic 1 135.49 62100 1030.7
FALSE + GROUP_Experiment 1 110.86 62124 1030.7
FALSE + ID 1 68.79 62166 1030.9
FALSE + SESSION_S4 1 31.52 62203 1031.0
FALSE
FALSE Step: AIC=1025.21
FALSE OUTCOME ~ POST_ind + ATT + SESSION_S3
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE + PREF_Visual 1 1082.12 59097 1024.0
FALSE <none> 60179 1025.2
FALSE + SLEVEL_Highschool 1 634.63 59544 1025.4
FALSE + SESSION_S4 1 496.16 59683 1025.8
FALSE + SESSION_S2 1 440.12 59739 1025.9
FALSE + PREF_Tactile 1 440.06 59739 1025.9
FALSE + `PREF_No Preference` 1 228.42 59951 1026.5
FALSE + PREF_Kinesthetic 1 132.83 60046 1026.8
FALSE + GROUP_Experiment 1 128.36 60051 1026.8
FALSE + ID 1 112.49 60066 1026.9
FALSE + GENDER_Female 1 13.09 60166 1027.2
FALSE
FALSE Step: AIC=1024.05
FALSE OUTCOME ~ POST_ind + ATT + SESSION_S3 + PREF_Visual
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE <none> 59097 1024.0
FALSE + SESSION_S2 1 515.58 58581 1024.5
FALSE + SLEVEL_Highschool 1 469.45 58627 1024.7
FALSE + SESSION_S4 1 466.08 58631 1024.7
FALSE + ID 1 455.84 58641 1024.7
FALSE + GROUP_Experiment 1 346.41 58750 1025.0
FALSE + PREF_Tactile 1 243.19 58854 1025.3
FALSE + PREF_Kinesthetic 1 88.24 59009 1025.8
FALSE + `PREF_No Preference` 1 74.19 59023 1025.8
FALSE + GENDER_Female 1 38.99 59058 1025.9
s$call
FALSE lm(formula = OUTCOME ~ POST_ind + ATT + SESSION_S3 + PREF_Visual,
FALSE data = teaching_DID)
backward
s <- step(mod, direction = 'backward')
FALSE Start: AIC=1031.33
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 +
FALSE SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic +
FALSE PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind +
FALSE GROUP_Experiment * POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - GENDER_Female 1 16.8 54947 1029.4
FALSE - ID 1 40.1 54971 1029.5
FALSE - SESSION_S4 1 52.6 54983 1029.5
FALSE - SESSION_S2 1 86.8 55017 1029.6
FALSE - PREF_Kinesthetic 1 128.8 55059 1029.7
FALSE - SLEVEL_Highschool 1 288.9 55219 1030.2
FALSE - PREF_Visual 1 471.4 55402 1030.8
FALSE - `PREF_No Preference` 1 494.5 55425 1030.9
FALSE <none> 54930 1031.3
FALSE - PREF_Tactile 1 651.1 55582 1031.4
FALSE - SESSION_S3 1 747.3 55678 1031.7
FALSE - GROUP_Experiment:POST_ind 1 2033.2 56964 1035.7
FALSE - ATT 1 3638.9 58569 1040.5
FALSE
FALSE Step: AIC=1029.38
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 +
FALSE SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic +
FALSE PREF_Visual + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - ID 1 36.0 54983 1027.5
FALSE - SESSION_S4 1 60.6 55008 1027.6
FALSE - SESSION_S2 1 79.5 55027 1027.6
FALSE - PREF_Kinesthetic 1 140.5 55088 1027.8
FALSE - SLEVEL_Highschool 1 290.6 55238 1028.3
FALSE - PREF_Visual 1 458.9 55406 1028.8
FALSE - `PREF_No Preference` 1 491.2 55438 1028.9
FALSE <none> 54947 1029.4
FALSE - PREF_Tactile 1 657.1 55604 1029.5
FALSE - SESSION_S3 1 861.6 55809 1030.1
FALSE - GROUP_Experiment:POST_ind 1 2033.2 56980 1033.7
FALSE - ATT 1 3632.3 58580 1038.5
FALSE
FALSE Step: AIC=1027.5
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 +
FALSE SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic +
FALSE PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - SESSION_S2 1 74.5 55058 1025.7
FALSE - SESSION_S4 1 109.8 55093 1025.8
FALSE - PREF_Kinesthetic 1 138.1 55121 1025.9
FALSE - SLEVEL_Highschool 1 273.4 55257 1026.4
FALSE - PREF_Visual 1 424.1 55407 1026.8
FALSE - `PREF_No Preference` 1 500.0 55483 1027.1
FALSE <none> 54983 1027.5
FALSE - PREF_Tactile 1 646.0 55629 1027.5
FALSE - SESSION_S3 1 900.4 55884 1028.3
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57016 1031.8
FALSE - ATT 1 3674.0 58657 1036.8
FALSE
FALSE Step: AIC=1025.73
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + SESSION_S4 +
FALSE `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic +
FALSE PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - PREF_Kinesthetic 1 156.6 55214 1024.2
FALSE - SESSION_S4 1 291.3 55349 1024.7
FALSE - SLEVEL_Highschool 1 305.7 55363 1024.7
FALSE - PREF_Visual 1 389.5 55447 1025.0
FALSE - `PREF_No Preference` 1 521.5 55579 1025.4
FALSE <none> 55058 1025.7
FALSE - PREF_Tactile 1 701.7 55759 1025.9
FALSE - SESSION_S3 1 1658.3 56716 1028.9
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57091 1030.0
FALSE - ATT 1 3624.7 58682 1034.8
FALSE
FALSE Step: AIC=1024.23
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + SESSION_S4 +
FALSE `PREF_No Preference` + PREF_Tactile + PREF_Visual + SLEVEL_Highschool +
FALSE POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - SESSION_S4 1 353.5 55568 1023.3
FALSE - SLEVEL_Highschool 1 374.7 55589 1023.4
FALSE - `PREF_No Preference` 1 395.5 55610 1023.5
FALSE - PREF_Visual 1 502.8 55717 1023.8
FALSE - PREF_Tactile 1 570.5 55785 1024.0
FALSE <none> 55214 1024.2
FALSE - SESSION_S3 1 1812.2 57026 1027.8
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57247 1028.5
FALSE - ATT 1 3545.2 58759 1033.0
FALSE
FALSE Step: AIC=1023.34
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + `PREF_No Preference` +
FALSE PREF_Tactile + PREF_Visual + SLEVEL_Highschool + POST_ind +
FALSE GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - `PREF_No Preference` 1 366.1 55934 1022.5
FALSE - PREF_Visual 1 512.3 56080 1022.9
FALSE - PREF_Tactile 1 546.1 56114 1023.0
FALSE - SLEVEL_Highschool 1 553.2 56121 1023.1
FALSE <none> 55568 1023.3
FALSE - SESSION_S3 1 1508.7 57076 1026.0
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57601 1027.6
FALSE - ATT 1 4125.7 59693 1033.8
FALSE
FALSE Step: AIC=1022.48
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Tactile +
FALSE PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - PREF_Tactile 1 244.5 56178 1021.2
FALSE - SLEVEL_Highschool 1 566.5 56500 1022.2
FALSE <none> 55934 1022.5
FALSE - PREF_Visual 1 923.8 56858 1023.3
FALSE - SESSION_S3 1 1823.6 57757 1026.1
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57967 1026.7
FALSE - ATT 1 4129.5 60063 1032.9
FALSE
FALSE Step: AIC=1021.24
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual +
FALSE SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - SLEVEL_Highschool 1 538.9 56717 1020.9
FALSE <none> 56178 1021.2
FALSE - PREF_Visual 1 1138.9 57317 1022.7
FALSE - SESSION_S3 1 1852.8 58031 1024.9
FALSE - GROUP_Experiment:POST_ind 1 2033.2 58212 1025.4
FALSE - ATT 1 4347.3 60526 1032.2
FALSE
FALSE Step: AIC=1020.9
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual +
FALSE POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE <none> 56717 1020.9
FALSE - PREF_Visual 1 1300.2 58017 1022.8
FALSE - SESSION_S3 1 1489.3 58206 1023.4
FALSE - GROUP_Experiment:POST_ind 1 2033.2 58750 1025.0
FALSE - ATT 1 4543.4 61261 1032.3
s
FALSE
FALSE Call:
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 +
FALSE PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID)
FALSE
FALSE Coefficients:
FALSE (Intercept) ATT
FALSE 27.5544 0.6071
FALSE GROUP_Experiment SESSION_S3
FALSE -3.9340 6.8695
FALSE PREF_Visual POST_ind
FALSE -10.4343 18.3256
FALSE GROUP_Experiment:POST_ind
FALSE 13.6744
s$call
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 +
FALSE PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID)
Selects the best model backwards which makes some variables significant.
Combination backward and forward
s <- step(mod)
FALSE Start: AIC=1031.33
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 +
FALSE SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic +
FALSE PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind +
FALSE GROUP_Experiment * POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - GENDER_Female 1 16.8 54947 1029.4
FALSE - ID 1 40.1 54971 1029.5
FALSE - SESSION_S4 1 52.6 54983 1029.5
FALSE - SESSION_S2 1 86.8 55017 1029.6
FALSE - PREF_Kinesthetic 1 128.8 55059 1029.7
FALSE - SLEVEL_Highschool 1 288.9 55219 1030.2
FALSE - PREF_Visual 1 471.4 55402 1030.8
FALSE - `PREF_No Preference` 1 494.5 55425 1030.9
FALSE <none> 54930 1031.3
FALSE - PREF_Tactile 1 651.1 55582 1031.4
FALSE - SESSION_S3 1 747.3 55678 1031.7
FALSE - GROUP_Experiment:POST_ind 1 2033.2 56964 1035.7
FALSE - ATT 1 3638.9 58569 1040.5
FALSE
FALSE Step: AIC=1029.38
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 +
FALSE SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic +
FALSE PREF_Visual + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - ID 1 36.0 54983 1027.5
FALSE - SESSION_S4 1 60.6 55008 1027.6
FALSE - SESSION_S2 1 79.5 55027 1027.6
FALSE - PREF_Kinesthetic 1 140.5 55088 1027.8
FALSE - SLEVEL_Highschool 1 290.6 55238 1028.3
FALSE - PREF_Visual 1 458.9 55406 1028.8
FALSE - `PREF_No Preference` 1 491.2 55438 1028.9
FALSE <none> 54947 1029.4
FALSE - PREF_Tactile 1 657.1 55604 1029.5
FALSE - SESSION_S3 1 861.6 55809 1030.1
FALSE - GROUP_Experiment:POST_ind 1 2033.2 56980 1033.7
FALSE - ATT 1 3632.3 58580 1038.5
FALSE
FALSE Step: AIC=1027.5
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 +
FALSE SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic +
FALSE PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - SESSION_S2 1 74.5 55058 1025.7
FALSE - SESSION_S4 1 109.8 55093 1025.8
FALSE - PREF_Kinesthetic 1 138.1 55121 1025.9
FALSE - SLEVEL_Highschool 1 273.4 55257 1026.4
FALSE - PREF_Visual 1 424.1 55407 1026.8
FALSE - `PREF_No Preference` 1 500.0 55483 1027.1
FALSE <none> 54983 1027.5
FALSE - PREF_Tactile 1 646.0 55629 1027.5
FALSE - SESSION_S3 1 900.4 55884 1028.3
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57016 1031.8
FALSE - ATT 1 3674.0 58657 1036.8
FALSE
FALSE Step: AIC=1025.73
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + SESSION_S4 +
FALSE `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic +
FALSE PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - PREF_Kinesthetic 1 156.6 55214 1024.2
FALSE - SESSION_S4 1 291.3 55349 1024.7
FALSE - SLEVEL_Highschool 1 305.7 55363 1024.7
FALSE - PREF_Visual 1 389.5 55447 1025.0
FALSE - `PREF_No Preference` 1 521.5 55579 1025.4
FALSE <none> 55058 1025.7
FALSE - PREF_Tactile 1 701.7 55759 1025.9
FALSE - SESSION_S3 1 1658.3 56716 1028.9
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57091 1030.0
FALSE - ATT 1 3624.7 58682 1034.8
FALSE
FALSE Step: AIC=1024.23
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + SESSION_S4 +
FALSE `PREF_No Preference` + PREF_Tactile + PREF_Visual + SLEVEL_Highschool +
FALSE POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - SESSION_S4 1 353.5 55568 1023.3
FALSE - SLEVEL_Highschool 1 374.7 55589 1023.4
FALSE - `PREF_No Preference` 1 395.5 55610 1023.5
FALSE - PREF_Visual 1 502.8 55717 1023.8
FALSE - PREF_Tactile 1 570.5 55785 1024.0
FALSE <none> 55214 1024.2
FALSE - SESSION_S3 1 1812.2 57026 1027.8
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57247 1028.5
FALSE - ATT 1 3545.2 58759 1033.0
FALSE
FALSE Step: AIC=1023.34
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + `PREF_No Preference` +
FALSE PREF_Tactile + PREF_Visual + SLEVEL_Highschool + POST_ind +
FALSE GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - `PREF_No Preference` 1 366.1 55934 1022.5
FALSE - PREF_Visual 1 512.3 56080 1022.9
FALSE - PREF_Tactile 1 546.1 56114 1023.0
FALSE - SLEVEL_Highschool 1 553.2 56121 1023.1
FALSE <none> 55568 1023.3
FALSE - SESSION_S3 1 1508.7 57076 1026.0
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57601 1027.6
FALSE - ATT 1 4125.7 59693 1033.8
FALSE
FALSE Step: AIC=1022.48
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Tactile +
FALSE PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - PREF_Tactile 1 244.5 56178 1021.2
FALSE - SLEVEL_Highschool 1 566.5 56500 1022.2
FALSE <none> 55934 1022.5
FALSE - PREF_Visual 1 923.8 56858 1023.3
FALSE - SESSION_S3 1 1823.6 57757 1026.1
FALSE - GROUP_Experiment:POST_ind 1 2033.2 57967 1026.7
FALSE - ATT 1 4129.5 60063 1032.9
FALSE
FALSE Step: AIC=1021.24
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual +
FALSE SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE - SLEVEL_Highschool 1 538.9 56717 1020.9
FALSE <none> 56178 1021.2
FALSE - PREF_Visual 1 1138.9 57317 1022.7
FALSE - SESSION_S3 1 1852.8 58031 1024.9
FALSE - GROUP_Experiment:POST_ind 1 2033.2 58212 1025.4
FALSE - ATT 1 4347.3 60526 1032.2
FALSE
FALSE Step: AIC=1020.9
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual +
FALSE POST_ind + GROUP_Experiment:POST_ind
FALSE
FALSE Df Sum of Sq RSS AIC
FALSE <none> 56717 1020.9
FALSE - PREF_Visual 1 1300.2 58017 1022.8
FALSE - SESSION_S3 1 1489.3 58206 1023.4
FALSE - GROUP_Experiment:POST_ind 1 2033.2 58750 1025.0
FALSE - ATT 1 4543.4 61261 1032.3
s
FALSE
FALSE Call:
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 +
FALSE PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID)
FALSE
FALSE Coefficients:
FALSE (Intercept) ATT
FALSE 27.5544 0.6071
FALSE GROUP_Experiment SESSION_S3
FALSE -3.9340 6.8695
FALSE PREF_Visual POST_ind
FALSE -10.4343 18.3256
FALSE GROUP_Experiment:POST_ind
FALSE 13.6744
s$call
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 +
FALSE PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID)
Model
Based on forward and backward selection as well as the combination we fit the following model
mod2 <- lm(formula = lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID),
data = teaching_DID)
summary(mod2)
FALSE
FALSE Call:
FALSE lm(formula = lm(formula = OUTCOME ~ ATT + GROUP_Experiment +
FALSE SESSION_S3 + PREF_Visual + POST_ind + GROUP_Experiment:POST_ind,
FALSE data = teaching_DID), data = teaching_DID)
FALSE
FALSE Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -47.446 -9.294 1.376 10.252 45.416
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 27.5544 8.2781 3.329 0.001074 **
FALSE ATT 0.6071 0.1660 3.658 0.000341 ***
FALSE GROUP_Experiment -3.9340 4.0090 -0.981 0.327874
FALSE SESSION_S3 6.8695 3.2804 2.094 0.037762 *
FALSE PREF_Visual -10.4343 5.3329 -1.957 0.052063 .
FALSE POST_ind 18.3256 3.9745 4.611 0.00000794 ***
FALSE GROUP_Experiment:POST_ind 13.6744 5.5887 2.447 0.015450 *
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE Residual standard error: 18.43 on 167 degrees of freedom
FALSE Multiple R-squared: 0.4056, Adjusted R-squared: 0.3843
FALSE F-statistic: 19 on 6 and 167 DF, p-value: < 0.00000000000000022
plot(mod2)
plot.lmSim(mod2)
We see there is no difference between the groups in the beginning. There is a difference between the times (post and pre). There is a treatment effect (interaction term). ATT higher better score. Session 3 seems to be better taught. It could be that visual preference has a negative effect.
Williams, Dakota, Ilyana
\[ We\ build\ a\ GLM\ model:\\ \\POST\ = GROUP + PREF + PRE \]
# Convert GROUP and PREF to factors
#data$GROUP <- factor(data$GROUP)
#data$PREF <- factor(data$PREF)
data <- read_excel("Bteaching.xls")
library(ggplot2)
FALSE
FALSE Attaching package: 'ggplot2'
FALSE The following object is masked from 'package:randomForest':
FALSE
FALSE margin
data$GENDER <- ifelse(data$GENDER == "M", 0, 1)
data$GROUP <- ifelse(data$GROUP == "C", 0, 1)
data$SLEVEL <- ifelse(data$SLEVEL == "E", 0, 1)
data$SESSION <- ifelse(data$SESSION == "S1", 1,
ifelse(data$SESSION == "S2", 2,
ifelse(data$SESSION == "S3", 3,
ifelse(data$SESSION == "S4", 4, NA))))
data$PREF <- ifelse(data$PREF == "A", 1,
ifelse(data$PREF == "K", 2,
ifelse(data$PREF == "N", 3,
ifelse(data$PREF == "T", 4,
ifelse(data$PREF == "V", 5, NA)))))
colnames(data)[1] <- "Group"
colnames(data)[2] <- "Session"
colnames(data)[3] <- "Preference"
colnames(data)[4] <- "Gender"
colnames(data)[5] <- "School Level"
colnames(data)[6] <- "Pre Test Score"
colnames(data)[7] <- "Post Test Score"
colnames(data)[8] <- "Attention Test Score"
require(corrplot)
FALSE Loading required package: corrplot
FALSE corrplot 0.92 loaded
#2B3433, #2F4F4E, #0D8387, #CBE4DE
#("#FF0000", "#FFFFFF", "#0000FF")
cor_mat <- cor(data[, 7], data[, -7], method = "spearman")
col <- colorRampPalette(c("#2B3433", "#FFFFFF", "#0D8387"))(25)
corrplot(cor_mat, col = col, tl.col = "black",
tl.srt = 40, tl.cex = 1.2,
title = "Correlation Matrix", mar = c(0, 0, 1, 0))
#I have this corrplot i want to order the correlation circles from highest to lowest correlation
# Select the 7th column of the data and compute the correlation matrix
data <- read_excel("Bteaching.xls")
data$SLEVEL <- factor(data$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
data$PREF <- factor(data$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
data$GROUP <- factor(data$GROUP)
modelGLMAAA <- glm(formula = POST ~ GROUP + PRE, data = data, family =gaussian(link = "identity"))
summary(modelGLMAAA)
FALSE
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"),
FALSE data = data)
FALSE
FALSE Deviance Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -29.993 -4.946 1.355 6.305 19.804
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 65.04353 2.93762 22.142 < 0.0000000000000002 ***
FALSE GROUPE 10.40348 2.18015 4.772 0.00000762 ***
FALSE PRE 0.18997 0.04332 4.386 0.00003332 ***
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE (Dispersion parameter for gaussian family taken to be 102.7003)
FALSE
FALSE Null deviance: 12621.6 on 86 degrees of freedom
FALSE Residual deviance: 8626.8 on 84 degrees of freedom
FALSE AIC: 654.81
FALSE
FALSE Number of Fisher Scoring iterations: 2
data <- read_excel("Bteaching.xls")
data$GENDER <- ifelse(data$GENDER == "M", 0, 1)
data$GROUP <- ifelse(data$GROUP == "C", 0, 1)
data$SESSION <- ifelse(data$SESSION == "S1", 1,
ifelse(data$SESSION == "S2", 2,
ifelse(data$SESSION == "S3", 3,
ifelse(data$SESSION == "S4", 4, NA))))
bn <- subset(data, PRE < 56)
bf <- subset(data, PRE > 57)
bn$SLEVEL <- factor(bn$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
bn$PREF <- factor(bn$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
bn$GROUP <- factor(bn$GROUP)
bf$SLEVEL <- factor(bf$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
bf$PREF <- factor(bf$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
bf$GROUP <- factor(bf$GROUP)
modelGLMbn <- glm(formula = POST ~ GROUP + PRE , data = bn, family =gaussian(link = "identity"))
summary(modelGLMbn)
FALSE
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"),
FALSE data = bn)
FALSE
FALSE Deviance Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -24.848 -7.213 -1.311 9.017 20.925
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 59.1553 4.7677 12.408 0.00000000000000182 ***
FALSE GROUP1 14.2274 3.3787 4.211 0.000136 ***
FALSE PRE 0.3385 0.1100 3.076 0.003725 **
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE (Dispersion parameter for gaussian family taken to be 121.9993)
FALSE
FALSE Null deviance: 7882.2 on 43 degrees of freedom
FALSE Residual deviance: 5002.0 on 41 degrees of freedom
FALSE AIC: 341.14
FALSE
FALSE Number of Fisher Scoring iterations: 2
modelGLMbf <- glm(formula = POST ~ GROUP + PRE , data = bf, family =gaussian(link = "identity"))
summary(modelGLMbf)
FALSE
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"),
FALSE data = bf)
FALSE
FALSE Deviance Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -20.544 -3.983 1.490 4.742 15.456
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 52.0628 8.5897 6.061 0.000000388 ***
FALSE GROUP1 7.0986 2.5775 2.754 0.00881 **
FALSE PRE 0.3626 0.1090 3.326 0.00189 **
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE (Dispersion parameter for gaussian family taken to be 71.37899)
FALSE
FALSE Null deviance: 4178.6 on 42 degrees of freedom
FALSE Residual deviance: 2855.2 on 40 degrees of freedom
FALSE AIC: 310.44
FALSE
FALSE Number of Fisher Scoring iterations: 2
data <- read_excel("Bteaching.xls")
data$GENDER <- ifelse(data$GENDER == "M", 0, 1)
data$GROUP <- ifelse(data$GROUP == "C", 0, 1)
data$SESSION <- ifelse(data$SESSION == "S1", 1,
ifelse(data$SESSION == "S2", 2,
ifelse(data$SESSION == "S3", 3,
ifelse(data$SESSION == "S4", 4, NA))))
bE <- subset(data, SLEVEL == "E")
bH <- subset(data, SLEVEL == "H")
bE$SLEVEL <- factor(bE$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
bE$PREF <- factor(bE$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
bE$GROUP <- factor(bE$GROUP)
bH$SLEVEL <- factor(bH$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
bH$PREF <- factor(bH$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
bH$GROUP <- factor(bH$GROUP)
modelGLMbE <- glm(formula = POST ~ GROUP + PRE, data = bE, family =gaussian(link = "identity"))
summary(modelGLMbE)
FALSE
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"),
FALSE data = bE)
FALSE
FALSE Deviance Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -30.687 -4.667 1.333 6.425 20.731
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 65.76547 3.47571 18.921 < 0.0000000000000002 ***
FALSE GROUP1 8.58190 2.71125 3.165 0.002506 **
FALSE PRE 0.18430 0.04968 3.710 0.000478 ***
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE (Dispersion parameter for gaussian family taken to be 107.8641)
FALSE
FALSE Null deviance: 8482.7 on 58 degrees of freedom
FALSE Residual deviance: 6040.4 on 56 degrees of freedom
FALSE AIC: 448.53
FALSE
FALSE Number of Fisher Scoring iterations: 2
modelGLMbH<- glm(formula = POST ~ GROUP + PRE, data = bH, family =gaussian(link = "identity"))
summary(modelGLMbH)
FALSE
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"),
FALSE data = bH)
FALSE
FALSE Deviance Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -21.0875 -3.9235 0.7582 6.4303 18.4303
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 61.54648 5.93420 10.371 0.000000000153 ***
FALSE GROUP1 14.12158 3.76730 3.748 0.000942 ***
FALSE PRE 0.23606 0.09636 2.450 0.021639 *
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE (Dispersion parameter for gaussian family taken to be 95.66149)
FALSE
FALSE Null deviance: 4109.7 on 27 degrees of freedom
FALSE Residual deviance: 2391.5 on 25 degrees of freedom
FALSE AIC: 211.99
FALSE
FALSE Number of Fisher Scoring iterations: 2
data <- read_excel("Bteaching.xls")
data$GENDER <- factor(data$GENDER)
data$GROUP <- factor(data$GROUP)
data$SLEVEL <- factor(data$SLEVEL)
data$SESSION <- factor(data$SESSION)
data$PREF <- factor(data$PREF)
#data$PREF1A <- ifelse(data$PREF == "A", 1, 10)
#data$PREF2K <- ifelse(data$PREF == "K", 2, 10)
#data$PREF3N <- ifelse(data$PREF == "N", 3, 10)
#data$PREF5V <- ifelse(data$PREF == "V", 5, 10)
#data$PREF4T <- ifelse(data$PREF == "T", 4, 10)
modelGLM2 <- glm(POST ~ PRE + PREF + GENDER, data = data, family =gaussian(link = "identity"))
summary(modelGLM2)
FALSE
FALSE Call:
FALSE glm(formula = POST ~ PRE + PREF + GENDER, family = gaussian(link = "identity"),
FALSE data = data)
FALSE
FALSE Deviance Residuals:
FALSE Min 1Q Median 3Q Max
FALSE -28.387 -7.970 1.110 7.002 28.830
FALSE
FALSE Coefficients:
FALSE Estimate Std. Error t value Pr(>|t|)
FALSE (Intercept) 70.91293 3.50008 20.260 < 0.0000000000000002 ***
FALSE PRE 0.21085 0.05056 4.170 0.0000766 ***
FALSE PREFK -10.14510 6.14079 -1.652 0.1024
FALSE PREFN -2.56565 3.16427 -0.811 0.4199
FALSE PREFT -0.01015 3.40394 -0.003 0.9976
FALSE PREFV 7.74824 4.89686 1.582 0.1175
FALSE GENDERM -5.01453 3.01101 -1.665 0.0997 .
FALSE ---
FALSE Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE
FALSE (Dispersion parameter for gaussian family taken to be 121.7772)
FALSE
FALSE Null deviance: 12621.6 on 86 degrees of freedom
FALSE Residual deviance: 9742.2 on 80 degrees of freedom
FALSE AIC: 673.39
FALSE
FALSE Number of Fisher Scoring iterations: 2
plot(modelGLM2)
library(ggplot2)
btc <- subset(BB, GROUP == "C")
bte <- subset(BB, GROUP == "E")
# Combine the data into a data frame
dataCE <- data.frame(group = c(rep("Control", length(btc$POST)), rep("Experiment", length(bte$POST))), value = c(btc$POST, bte$POST))
# Create the box plot graph
ggplot(dataCE, aes(x = group, y = value, fill = group)) + geom_boxplot() + theme_classic()
# Combine the data into a data frame
dataCE <- data.frame(group = c(rep("Control", length(btc$POST)), rep("Experiment", length(bte$POST))), value = c(btc$POST, bte$POST))
# Data frame for each preference for the control group
bc <- subset(BB, GROUP == "C")
bta <- subset(bc, PREF == "A")
btk <- subset(bc, PREF == "K")
btn <- subset(bc, PREF == "N")
btt <- subset(bc, PREF == "T")
btv <- subset(bc, PREF == "V")
dataAKNTV <- data.frame(group = c(rep("Auditory", length(bta$POST)), rep("Kinesthetic",length(btk$POST)), rep("No preference", length(btn$POST)), rep("Tactile", length(btt$POST)), rep("Visual", length(btv$POST))), value = c(bta$POST, btk$POST, btn$POST, btt$POST, btv$POST))
ggplot(dataAKNTV, aes(x = group, y = value, fill = group)) + geom_boxplot() + theme_classic()
# Data frame for each preference for the experiment group
be <- subset(BB, GROUP == "E")
bta <- subset(be, PREF == "A")
btk <- subset(be, PREF == "K")
btn <- subset(be, PREF == "N")
btt <- subset(be, PREF == "T")
btv <- subset(be, PREF == "V")
dataAKNTV <- data.frame(group = c(rep("Auditory", length(bta$POST)), rep("Kinesthetic",length(btk$POST)), rep("No preference", length(btn$POST)), rep("Tactile", length(btt$POST)), rep("Visual", length(btv$POST))), value = c(bta$POST, btk$POST, btn$POST, btt$POST, btv$POST))
ggplot(dataAKNTV, aes(x = group, y = value, fill = group)) + geom_boxplot() + theme_classic()
btm <- subset(BB, GENDER == "M")
btf <- subset(BB, GENDER == "F")
# Combine the data into a data frame
dataMF <- data.frame(group = c(rep("Male", length(btm$POST)), rep("Female", length(btf$POST))), value = c(btm$POST, btf$POST))
# Create the box plot graph
ggplot(dataMF, aes(x = group, y = value, fill = group)) + geom_boxplot() + theme_classic()
mean(btf$POST)-mean(btm$POST)
FALSE [1] 5.179832
mean(btf$PRE)-mean(btm$PRE)
FALSE [1] 1.882353
mean(btm$POST)
FALSE [1] 76.70588
We have a dataset of 87 students, with 8 variables. (qualitative and quatitative) – GROUP: C = ‘Control’ (i.e. traditional teaching methods), E = ‘Experiment’ (i.e. incorporated learning styles); – SESSION: students were randomly assigned to 1 of 4 sessions (denoted by S1 to S4) within each GROUP; – PREF: students’ learning style preference (T = ‘Tactile’, K = ‘Kinesthetic’, A = ‘Auditory’, V = ‘Visual’, N = ‘No preference’); – GENDER: M = male, F = female; – SLEVEL: school level (E = ‘Elementary, H = ’High School’); – PRE: pre-test score (out of 100); – POST: post-test score (out of 100); – ATT: attitude scale score (out of 60).
We want to know if the students in the experiment group who were able to choose a PREF scored higher in a POST test than the students who were in a control group. PREF is the response variable. Build a model in R to answer the question. The client wants you to investigate his research hypothesis that students in training sessions that utilise a processing activity that matches the students’ perceptual learning style preferences will demonstrate greater long-term retention of content than students in a traditional setting that has not utilised that processing activity. Build a model in R considering t
data <- read_excel("Bteaching.xls")
# Create a new variable for matching learning style preference
data$MATCH <- ifelse(data$PREF == "N", "ALL", data$PREF)
# Split the data into Control and Experiment groups
control_data <- subset(data, GROUP == "C")
experiment_data <- subset(data, GROUP == "E")
# Calculate the mean post-test scores for each group and learning style preference
control_means <- aggregate(control_data$POST, by = list(control_data$MATCH), FUN = mean)
experiment_means <- aggregate(experiment_data$POST, by = list(experiment_data$MATCH), FUN = mean)